Notes

Ok we’ve looked at a lot, maybe streamline to investigate grid size and whether a different grid size can make things more robust

  • we did 8x8 so we can go smaller or larger

  • 5x5, 6x6, 10x10

  • could do ensemble members instead of ensemble averages

  • maybe we don’t do the som at all, maybe we just plot a 4 panel map for each ESM with the color coming from T, P, iasd. The color is sort of interpretable, especially with all of the same ESMs on the same color wheel

helper function to process time series

And some package and labeling info

library(ggplot2)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(tidyr)
library(kohonen)
library(vegan)  # for vegdist function which gives a dissimilarity index
## Loading required package: permute
## Loading required package: lattice
## This is vegan 2.6-4
timeseries_dir <- 'extracted_timeseries/'

# get ecs ordering/labels
esm_labels <- read.csv(paste0(timeseries_dir,'global_tas_allesms.csv'), stringsAsFactors = FALSE) %>%
  select(esm) %>% distinct %>% 
  mutate(plotesm = paste0(letters[as.integer(row.names(.))], '.', esm),
         ECS_order = as.integer(row.names(.)))
print(esm_labels)
##              esm         plotesm ECS_order
## 1    CAMS-CSM1-0   a.CAMS-CSM1-0         1
## 2         MIROC6        b.MIROC6         2
## 3      GFDL-ESM4     c.GFDL-ESM4         3
## 4      FGOALS-g3     d.FGOALS-g3         4
## 5  MPI-ESM1-2-HR e.MPI-ESM1-2-HR         5
## 6  MPI-ESM1-2-LR f.MPI-ESM1-2-LR         6
## 7     MRI-ESM2-0    g.MRI-ESM2-0         7
## 8  ACCESS-ESM1-5 h.ACCESS-ESM1-5         8
## 9   IPSL-CM6A-LR  i.IPSL-CM6A-LR         9
## 10   CESM2-WACCM   j.CESM2-WACCM        10
## 11   UKESM1-0-LL   k.UKESM1-0-LL        11
## 12       CanESM5       l.CanESM5        12
process_time_series <- function(time_series_df, esm_label_info,
                                hist_start = 1995, hist_end = 2014){
  # Get ensemble member values for projection runs:
  # 1. 2080-2099 average
  # 2. loess detrend each ensemble member to get IAV
  #
  # split by run
  non_hist <- time_series_df %>% 
    filter(experiment != 'historical') %>% 
    select(year, ann_agg, esm, experiment, ensemble, variable, region)
  
  grouped <- split(non_hist, f = list(non_hist$esm, 
                                      non_hist$experiment,
                                      non_hist$ensemble, 
                                      non_hist$variable ,
                                      non_hist$region) )
  # split creates group of every possible combo of the variables and fills in
  # empty dataframes for the ones that don't exist in data. Drop those
  grouped <- grouped[lapply(grouped, nrow)>0]
  
  processed_groups <- lapply(grouped, FUN = function(run_df){
    loess_resids <- loess(run_df$ann_agg ~ run_df$year)$residuals
    
    run_df %>%
      filter(year >= 2080, year <= 2099) %>%
      group_by(esm, experiment, ensemble, variable, region) %>%
      summarise(average_2080_2099 = mean(ann_agg)) %>% 
      ungroup  %>%
      mutate(iasd = sd((loess_resids))) ->
      output_df
    
    return(output_df)
    
  })
  
  individual_stats <- do.call(bind_rows, processed_groups)
  rm(non_hist)
  rm(grouped)
  rm(processed_groups)
  
  # calculate ensemble averages
  individual_stats %>%
    group_by(esm, experiment, variable,region) %>%
    summarise(average_2080_2099 = mean(average_2080_2099),
              iasd = mean(iasd)) %>%
    ungroup ->
    ensemble_stats
  
  
  # get ensemble average historical average value:
  time_series_df %>%
    filter(experiment == 'historical',
           year >= hist_start,
           year <= hist_end) %>%
    group_by(esm, experiment, ensemble, variable, region) %>%
    summarise(historical_average = mean(ann_agg)) %>%
    ungroup %>%
    group_by(esm, experiment, variable, region) %>%
    summarise(historical_average = mean(historical_average)) %>%
    ungroup %>%
    select(-experiment) ->
    historical_ens
  
  
  # shape and calculate changes for plotting:
  ensemble_stats %>%
    left_join(historical_ens, by = c('esm', 'variable', 'region')) %>%
    left_join(esm_label_info, by = 'esm') %>%
    mutate(change = average_2080_2099 - historical_average,
           pct_change = 100*(average_2080_2099 - historical_average)/historical_average) ->
    plot_tbl
  
return(plot_tbl)
}


process_ens_time_series <- function(time_series_df, esm_label_info,
                                hist_start = 1995, hist_end = 2014){
  # Get ensemble member values for projection runs:
  # 1. 2080-2099 average
  # 2. loess detrend each ensemble member to get IAV
  #
  # split by run
  non_hist <- time_series_df %>% 
    filter(experiment != 'historical') %>% 
    select(year, ann_agg, esm, experiment, ensemble, variable, region)
  
  grouped <- split(non_hist, f = list(non_hist$esm, 
                                      non_hist$experiment,
                                      non_hist$ensemble, 
                                      non_hist$variable ,
                                      non_hist$region) )
  # split creates group of every possible combo of the variables and fills in
  # empty dataframes for the ones that don't exist in data. Drop those
  grouped <- grouped[lapply(grouped, nrow)>0]
  
  processed_groups <- lapply(grouped, FUN = function(run_df){
    loess_resids <- loess(run_df$ann_agg ~ run_df$year)$residuals
    
    run_df %>%
      filter(year >= 2080, year <= 2099) %>%
      group_by(esm, experiment, ensemble, variable, region) %>%
      summarise(average_2080_2099 = mean(ann_agg)) %>% 
      ungroup  %>%
      mutate(iasd = sd((loess_resids))) ->
      output_df
    
    return(output_df)
    
  })
  
  individual_stats <- do.call(bind_rows, processed_groups)
  rm(non_hist)
  rm(grouped)
  rm(processed_groups)
  
  # calculate ensemble averages
  individual_stats ->
    ensemble_stats
  
  
  # get ensemble average historical average value:
  time_series_df %>%
    filter(experiment == 'historical',
           year >= hist_start,
           year <= hist_end) %>%
    group_by(esm, experiment, ensemble, variable, region) %>%
    summarise(historical_average = mean(ann_agg)) %>%
    ungroup %>%
    group_by(esm, experiment, variable, region) %>%
    summarise(historical_average = mean(historical_average)) %>%
    ungroup %>%
    select(-experiment) ->
    historical_ens
  
  
  # shape and calculate changes for plotting:
  ensemble_stats %>%
    left_join(historical_ens, by = c('esm', 'variable', 'region')) %>%
    left_join(esm_label_info, by = 'esm') %>%
    mutate(change = average_2080_2099 - historical_average,
           pct_change = 100*(average_2080_2099 - historical_average)/historical_average) ->
    plot_tbl
  
return(plot_tbl)
}

prep_esm_TP_data <- function(esmname){
  
  region_timeseries <- read.csv(paste0(timeseries_dir, 'IPCC_land_regions_tas_', esmname, '_timeseries_1980_2099.csv'),
                              stringsAsFactors = FALSE) %>% mutate(region = acronym)
  
  region_tas_summary <- suppressMessages(process_time_series(time_series_df = region_timeseries, esm_label_info = esm_labels))
  
  region_pr_summary <- suppressMessages(process_time_series(time_series_df = 
                                                            read.csv(paste0(timeseries_dir, 'IPCC_land_regions_pr_', esmname,'_timeseries_1980_2099.csv'),
                                                                     stringsAsFactors = FALSE) %>%
                                                            rename(ann_agg=pr) %>% 
                                                            mutate(region = acronym) ,
                                                          esm_label_info = esm_labels))
  
  # reshape so each row is an observation
  # observation = esm - experiment - region - tas change-tas iasd - pr pct change
  region_tas_summary %>% 
    select(esm, experiment, region, iasd, change) %>%
    rename(tas_change = change) %>%
    left_join(region_pr_summary %>%
                select(esm, experiment, region, pct_change) %>% 
                rename(pr_pct = pct_change),
              by = c('esm', 'experiment', 'region')) ->
    region_summary

return(region_summary)
  
}

load for an ESM

Consider an observation = esm - experiment - region : tas change-tas iasd - pr pct change

# region_summary <- prep_esm_TP_data(esmname =  'GFDL-ESM4')
# region_summary %>%
#   bind_rows(prep_esm_TP_data(esmname =  'CESM2-WACCM')) %>%
#    bind_rows(prep_esm_TP_data(esmname =  'CAMS-CSM1-0'))->
#   region_summary_main
# write.csv(region_summary_main, 'CAMS_GFDL_CESM2-WACCM_summaries.csv', row.names = F)

region_summary_main <- read.csv('CAMS_GFDL_CESM2-WACCM_summaries.csv', stringsAsFactors = FALSE)
# make a copy to operate on
region_summary <- region_summary_main
print(head(region_summary))
##         esm experiment region      iasd tas_change     pr_pct
## 1 GFDL-ESM4     ssp119    ARP 0.2652738  0.2610944  2.9927902
## 2 GFDL-ESM4     ssp119    CAF 0.2703431  0.5037959 -1.9838156
## 3 GFDL-ESM4     ssp119    CAR 0.1574689  0.2216722 -0.3187443
## 4 GFDL-ESM4     ssp119    CAU 0.5345113  0.4218907 -3.8398722
## 5 GFDL-ESM4     ssp119    CNA 0.4910504  0.9446315  6.3459839
## 6 GFDL-ESM4     ssp119    EAS 0.2584605  0.7250376 10.5204045

Spatial info

default SOM packages can only operate on numerical data, not categorical. So we have to assign some amount of spatial location numerical info to each acronym. Ideally mean lat-lon in the shape?

Also we want the shapes for plotting anyway, so prep them

library(sf)
## Linking to GEOS 3.10.2, GDAL 3.4.2, PROJ 8.2.1; sf_use_s2() is TRUE
shp <- st_read(dsn = 'IPCC-WGI-reference-regions-v4_shapefile/IPCC-WGI-reference-regions-v4.shp', stringsAsFactors = F)
## Reading layer `IPCC-WGI-reference-regions-v4' from data source 
##   `/Users/snyd535/Documents/GitHub/stitches_in_r/R/inst/shinyApp/python_curation/IPCC-WGI-reference-regions-v4_shapefile/IPCC-WGI-reference-regions-v4.shp' 
##   using driver `ESRI Shapefile'
## Simple feature collection with 58 features and 4 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: -180 ymin: -90 xmax: 180 ymax: 90
## Geodetic CRS:  WGS 84
# add a numerical region id
shp %>% 
  mutate(region_id = as.integer(row.names(.))) ->
  shp

# add coordinate info probably
shp1 <-  st_transform(shp, "+proj=longlat +ellps=WGS84 +datum=WGS84")

# extract
coords <- as.data.frame(st_coordinates(shp1))


# get a mean lon and lat value in each shape
coords %>%
  rename(lon = X, lat = Y, region_id = L3) %>%
  left_join(as.data.frame(shp) %>% select(region_id, Acronym), by = 'region_id') %>%
  filter(grepl('PO', Acronym)) %>% 
  # have to have lon on 0:360 so th pacific ocean behaves even though not
  # looking at that here
  mutate(lon_360 = if_else(lon >=0, lon, lon+360))%>%
  group_by(region_id) %>%
  summarise(mean_lon = mean(lon_360),
            mean_lat = mean(lat)) %>%
  ungroup  %>%
  mutate(mean_lon = if_else(mean_lon >= 0 & mean_lon <= 180, 
                            mean_lon, mean_lon - 360) ) ->
  mean_pts_PO

coords %>%
  rename(lon = X, lat = Y, region_id = L3) %>%
  left_join(as.data.frame(shp) %>% select(region_id, Acronym), by = 'region_id') %>%
  filter(!grepl('PO', Acronym)) %>% 
  # have to have lon on 0:360 so th pacific ocean behaves even though not
  # looking at that here
  group_by(region_id) %>%
  summarise(mean_lon = mean(lon),
            mean_lat = mean(lat)) %>%
  ungroup  %>% 
  bind_rows(mean_pts_PO)->
  mean_pts 
# Join to the shape file and make sure this very simple way of
# doing things ends up with a lon lat that is actually in each region
shp %>%
  left_join(mean_pts, by = 'region_id') ->
  shp

ggplot() +
  geom_sf(data = shp  ) +
  geom_point(data = shp, mapping = aes(x = mean_lon, y = mean_lat), color = 'red') +
  geom_text(data = shp, mapping = aes(label = Acronym, x = mean_lon, y= mean_lat), size =5)

test - convert to rgb first

we have 3 variables per observarion = experimentXregion -> convert to rgb values

  • looking across ESMs, we’ll want to make sure we have them all on consistent range before converting to (0,255)

  • each color/family of colors does have a physical interpretation in terms of iasd, t, p

  • red = temperature

  • blue = precip

  • iasd = green

# convert to RGB
region_summary %>%
  filter(experiment != 'ssp119') ->
  region_summary 
region_summary$r <- scales::rescale(region_summary$tas_change, to =c(0,255))
region_summary$g <- scales::rescale(region_summary$iasd, to =c(0,255))
region_summary$b <- scales::rescale(region_summary$pr_pct, to =c(0,255))


# add spatial numerical info
region_summary %>%
  left_join(as.data.frame(shp) %>% select(Acronym, mean_lon, mean_lat), by = c('region' = 'Acronym')) %>%
  rename(lon  = mean_lon, lat = mean_lat) %>%
  # add the original colors 
  mutate(orig_color = rgb(.$r, .$g, .$b, maxColorValue  = 255),
         color_order = as.integer(row.names(.))) ->
  region_summary

region_summary %>%
  select(color_order, orig_color) %>%
  distinct() ->
  colors

region_numerical <- as.data.frame(region_summary[c('lon', 'lat', 'r', "g", "b")])

Plot directly

  • Green dominant = iasd dominates T and P

  • red dominant = T dominates iasd and P

  • blue dominant = P dominates T and IASD

  • black: magnitude of (r,g,b) overall small/near min value of T,P, IASD across SSPs and ESMs as set up

  • white: magnitude of (r,g,b) overall large/near max value of T,P, IASD across SSPs and ESMs as set up

MAJOR downside - don’t have a clear increasing vs decreasing interpretation with how it’s set up.

  • for temperature, smallest change is -0.03199914 deg C, so effectively only ever increasing Temp
  • IASD strictly >0
  • That leaves precip with no clear interpretable demarcation between increase and decrease; and b=127 is not the same as pr=0% change because range of values is not symmetrical about 0 in data plotting
shp %>% 
  left_join(region_summary, by = c('Acronym' = 'region')) %>%
  left_join(esm_labels %>% select(esm, plotesm), by = 'esm') ->
    shp_esms

p<- ggplot() +
    geom_sf(data = shp_esms %>% na.omit, aes(fill = as.factor(color_order)) ) +
  scale_fill_manual(values =colors$orig_color)+
  facet_grid(plotesm~experiment ) +
  theme(legend.position = 'none') 

ggsave('python_curation_figs/CAMS_GFDL_CESM_rawdata.png', p, width =8, units = 'in')
## Saving 8 x 5 in image
p

# only decreasing precip
shp_esms %>%
  mutate(precip_change = if_else(pr_pct >=0, 'inc', 'dec'))->
  shp_esms2


ggplot() +
    geom_sf(data = shp_esms2 %>% na.omit, aes(fill = as.factor(color_order), color = precip_change )) +
  scale_fill_manual(values =colors$orig_color)+
  scale_color_manual(values = c('red', 'blue'))+
  facet_grid(plotesm~experiment ) +
  theme(legend.position = 'none') 

ggplot() +
    geom_sf(data = shp_esms2 %>% na.omit, aes(color = precip_change )) +
  scale_fill_manual(values =colors$orig_color)+
  scale_color_manual(values = c('red', 'blue'))+
  facet_grid(plotesm~experiment ) 

ggplot() +
    geom_sf(data = shp_esms2 %>% na.omit, aes(fill = as.factor(color_order), color = precip_change )) +
  scale_fill_manual(values =colors$orig_color)+
  scale_color_manual(values = c('yellow', 'blue'))+
  facet_grid(plotesm~experiment ) +
  theme(legend.position = 'none') 

ggplot() +
    geom_sf(data = shp_esms2 %>% na.omit, aes(color = precip_change )) +
  scale_fill_manual(values =colors$orig_color)+
  scale_color_manual(values = c('yellow', 'blue'))+
  facet_grid(plotesm~experiment ) 

# same thing but black is low magnitude therefore decreasing P, white is high and therefore inc P
ggplot() +
    geom_sf(data = shp_esms2 %>% na.omit, aes(fill = as.factor(color_order), color = precip_change )) +
  scale_fill_manual(values =colors$orig_color)+
  scale_color_manual(values = c('black', 'white'))+
  facet_grid(plotesm~experiment ) +
  theme(legend.position = 'none') 

ggplot() +
    geom_sf(data = shp_esms2 %>% na.omit, aes( color = precip_change )) +
  scale_color_manual(values = c('black', 'white'))+
  facet_grid(plotesm~experiment ) +
  theme(legend.position = 'none') 

3d plot to get a sense of the legend

library(plotly)
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
fig <- plot_ly(region_summary, x = ~tas_change, y = ~iasd, z = ~pr_pct, color = ~orig_color,
               colors = region_summary$orig_color)
fig
## No trace type specified:
##   Based on info supplied, a 'scatter3d' trace seems appropriate.
##   Read more about this trace type -> https://plotly.com/r/reference/#scatter3d
## No scatter3d mode specifed:
##   Setting the mode to markers
##   Read more about this attribute -> https://plotly.com/r/reference/#scatter-mode
fig <- plot_ly(region_summary %>% filter(experiment == 'ssp126'), x = ~tas_change, y = ~iasd, z = ~pr_pct, color = ~orig_color,
               colors = (region_summary %>% filter(experiment == 'ssp126'))$orig_color)
fig
## No trace type specified:
##   Based on info supplied, a 'scatter3d' trace seems appropriate.
##   Read more about this trace type -> https://plotly.com/r/reference/#scatter3d
## No scatter3d mode specifed:
##   Setting the mode to markers
##   Read more about this attribute -> https://plotly.com/r/reference/#scatter-mode
fig <- plot_ly(region_summary %>% filter(experiment == 'ssp245'), x = ~tas_change, y = ~iasd, z = ~pr_pct, color = ~orig_color,
               colors = (region_summary %>% filter(experiment == 'ssp245'))$orig_color)
fig
## No trace type specified:
##   Based on info supplied, a 'scatter3d' trace seems appropriate.
##   Read more about this trace type -> https://plotly.com/r/reference/#scatter3d
## No scatter3d mode specifed:
##   Setting the mode to markers
##   Read more about this attribute -> https://plotly.com/r/reference/#scatter-mode
fig <- plot_ly(region_summary %>% filter(experiment == 'ssp370'), x = ~tas_change, y = ~iasd, z = ~pr_pct, color = ~orig_color,
               colors = (region_summary %>% filter(experiment == 'ssp370'))$orig_color)
fig
## No trace type specified:
##   Based on info supplied, a 'scatter3d' trace seems appropriate.
##   Read more about this trace type -> https://plotly.com/r/reference/#scatter3d
## No scatter3d mode specifed:
##   Setting the mode to markers
##   Read more about this attribute -> https://plotly.com/r/reference/#scatter-mode
fig <- plot_ly(region_summary %>% filter(experiment == 'ssp585'), x = ~tas_change, y = ~iasd, z = ~pr_pct, color = ~orig_color,
               colors = (region_summary %>% filter(experiment == 'ssp585'))$orig_color)
fig
## No trace type specified:
##   Based on info supplied, a 'scatter3d' trace seems appropriate.
##   Read more about this trace type -> https://plotly.com/r/reference/#scatter3d
## No scatter3d mode specifed:
##   Setting the mode to markers
##   Read more about this attribute -> https://plotly.com/r/reference/#scatter-mode